Show code
# Load required packages
library(tidyverse)
library(sf)
library(httr2)Hajara Muzammal
New York City’s Department of Parks and Recreation manages nearly 900,000 trees across all five boroughs, providing critical environmental benefits to millions of residents, yet access to this green infrastructure remains unequal across neighborhoods. This project analyzes the NYC TreeMap dataset alongside City Council District boundaries to explore patterns in tree distribution, health, and species diversity at the district level. Using spatial analysis and data visualization, we identify opportunities for targeted intervention and propose a data-driven tree revitalization program that addresses gaps in the urban canopy and promotes environmental equity.
Data Preparation
# ============================================================================
# Task 1: Download NYC City Council District Boundaries
# ============================================================================
download_council_districts <- function(
url = "https://www.nyc.gov/assets/planning/download/zip/data-maps/open-data/nycc_24d.zip",
data_dir = "data/mp03",
zip_file = "nyc_council_districts.zip"
) {
# Step 1: Create directory if it doesn't exist
if (!dir.exists(data_dir)) {
dir.create(data_dir, recursive = TRUE)
message("Created directory: ", data_dir)
}
# Step 2: Download zip file only if needed
zip_path <- file.path(data_dir, zip_file)
if (!file.exists(zip_path)) {
message("Downloading council district boundaries...")
download.file(url, zip_path, mode = "wb")
message("Download complete: ", zip_path)
} else {
message("Zip file already exists, skipping download")
}
# Step 3: Unzip the file only if needed
# Check if shp file already exists
shp_files <- list.files(data_dir, pattern = "\\.shp$", full.names = TRUE)
if (length(shp_files) == 0) {
message("Unzipping file...")
unzip(zip_path, exdir = data_dir)
message("Unzip complete")
# Get the shp file after unzipping
shp_files <- list.files(data_dir, pattern = "\\.shp$", full.names = TRUE)
} else {
message("Shapefile already exists, skipping unzip")
}
# Step 4: Read the shapefile using st_read
message("Reading shapefile...")
districts <- st_read(shp_files[1], quiet = TRUE)
message("Shapefile read successfully")
# Step 5: Transform to WGS 84
message("Transforming to WGS84 coordinate system...")
districts_wgs84 <- st_transform(districts, crs = "WGS84")
message("Transformation complete!")
message("Number of council districts: ", nrow(districts_wgs84))
# Step 6: Return the transformed data
return(districts_wgs84)
}
# Execute the function
council_districts <- download_council_districts()
# View the data
glimpse(council_districts)Rows: 51
Columns: 4
$ coun_dist <dbl> 42, 45, 20, 21, 22, 19, 30, 29, 51, 23, 6, 7, 17, 40, 48, 1…
$ shape_leng <dbl> 117530.81, 56967.63, 61223.01, 70355.16, 86774.78, 117797.0…
$ shape_area <dbl> 411895232, 117904762, 144833269, 150651760, 186235161, 4795…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-73.86078 4..., MULTIPOLYGON (…
Council Districts loaded: 51
Above is a visual of NYC Council Districts.
# ============================================================================
# Task 2: Download NYC Tree Points Data
# ============================================================================
download_tree_points <- function(
base_url = "https://data.cityofnewyork.us/resource/hn5i-inap.geojson",
limit = 5000,
data_dir = "data/mp03/trees",
pause = 1
) {
# Load required packages
library(httr2)
library(sf)
library(dplyr)
# Create trees directory if it doesn't exist
if (!dir.exists(data_dir)) {
dir.create(data_dir, recursive = TRUE)
message("Created directory: ", data_dir)
}
# ---- Phase 1: Download all pages ----
files <- character()
offset <- 0L
page <- 1L
message("Starting download of NYC Tree Points data...")
message("Using limit = ", limit, " records per page\n")
repeat {
# Construct filename with consistent naming schema
fname <- file.path(data_dir,
sprintf("treepoints_%08d_%05d.geojson", offset, limit))
# Only download if file doesn't exist
if (!file.exists(fname)) {
message("Downloading page ", page, " (offset: ", offset, ")...")
# Build request using httr2
req <- request(base_url) |>
req_url_query(
`$limit` = limit,
`$offset` = offset
) |>
req_timeout(300) |> # 5 minute timeout
req_retry(max_tries = 3) # Retry failed requests
# Perform request with error handling
tryCatch({
resp <- req_perform(req)
writeBin(resp_body_raw(resp), fname)
message(" Saved to: ", basename(fname))
Sys.sleep(pause) # Be respectful to the API
}, error = function(e) {
message(" Error: ", e$message)
message(" Retrying in 5 seconds...")
Sys.sleep(5)
resp <- req_perform(req)
writeBin(resp_body_raw(resp), fname)
})
} else {
message("Page ", page, " already exists (", basename(fname), "), skipping download")
}
# Read the file to check how many records we got
trees_page <- try(suppressWarnings(st_read(fname, quiet = TRUE)), silent = TRUE)
nret <- if (inherits(trees_page, "sf")) nrow(trees_page) else 0L
message(" Page ", page, " contains ", nret, " records")
files <- c(files, fname)
# If we got fewer records than limit, we've reached the end
if (nret < limit) {
message("\nReached end of dataset at page ", page)
message("Total pages downloaded: ", page, "\n")
break
}
# Update for next iteration
offset <- offset + limit
page <- page + 1L
}
# ---- Phase 2: Read all GeoJSON files ----
message("Reading and combining ", length(files), " GeoJSON files...")
all_trees <- lapply(files, function(f) {
df <- st_read(f, quiet = TRUE)
geom_col <- attr(df, "sf_column")
char_cols <- setdiff(names(df), geom_col)
for (col in char_cols) {
df[[col]] <- as.character(df[[col]])
}
return(df)
})
message("Combining all datasets with bind_rows()...")
trees_combined <- bind_rows(all_trees)
# Convert columns to proper types after combining
message("Converting columns to proper data types...")
# Convert date columns
if ("planteddate" %in% names(trees_combined)) {
trees_combined$planteddate <- as.POSIXct(
trees_combined$planteddate,
format = "%Y-%m-%dT%H:%M:%S",
tz = "UTC"
)
}
if ("curb_loc" %in% names(trees_combined)) {
trees_combined$curb_loc <- as.character(trees_combined$curb_loc)
}
# Convert numeric columns
numeric_cols <- c("tree_dbh", "stump_diam", "zipcode", "zip_city",
"cb_num", "borocode", "latitude", "longitude",
"council_district", "census_tract", "bin", "bbl")
for (col in numeric_cols) {
if (col %in% names(trees_combined)) {
trees_combined[[col]] <- as.numeric(trees_combined[[col]])
}
}
message("\n===========================================")
message("Download and combination complete!")
message("Total records: ", format(nrow(trees_combined), big.mark = ","))
message("Total columns: ", ncol(trees_combined))
message("===========================================\n")
return(trees_combined)
}
# Execute the download function
trees <- download_tree_points(limit = 5000, pause = 1)
# Verify the data
glimpse(trees)Rows: 1,094,587
Columns: 14
$ tpcondition <chr> "Excellent", "Good", "Poor", "Fair", "Dead", "Fa…
$ stumpdiameter <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ riskratingdate <chr> NA, NA, NA, "2024-06-28 12:41:55", NA, NA, NA, N…
$ riskrating <chr> NA, NA, NA, "6", NA, NA, NA, NA, NA, NA, "7", NA…
$ objectid <chr> "86823", "87623", "88023", "88823", "88824", "88…
$ globalid <chr> "2B457A4C-E0E4-4E17-81C4-A5449F51C804", "37195E1…
$ tpstructure <chr> "Full", "Retired", "Retired", "Full", "Retired",…
$ plantingspaceglobalid <chr> "E814CD37-9F53-4D79-AF86-3B454F9D29B9", "A644AB7…
$ createddate <chr> "2015-02-28 05:00:00", "2015-03-03 05:00:00", "2…
$ dbh <chr> "20", "10", "24", "10", "10", "19", "12", "8", "…
$ planteddate <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ updateddate <chr> "2016-10-20 17:43:53", "2019-09-18 13:12:55", "2…
$ genusspecies <chr> "Acer nigrum - black maple", "Fraxinus pennsylva…
$ geometry <POINT [°]> POINT (-73.81657 40.71629), POINT (-73.938…
=== NYC Trees Dataset Summary ===
Total trees: 1,094,587
Variables: 14
Coordinate system: WGS 84
# Task 3: Create map with trees over districts
map_all_trees <- ggplot() +
geom_sf(data = council_districts, fill = "white", color = "black", linewidth = 0.5) +
geom_sf(data = trees, color = "darkgreen", size = 0.05, alpha = 0.2) +
theme_minimal() +
labs(title = "NYC Street Trees by Council District",
subtitle = paste("Showing", format(nrow(trees), big.mark = ","), "trees")) +
theme(axis.text = element_blank(), axis.title = element_blank(), panel.grid = element_blank())
print(map_all_trees)The NYC Street Trees has 1,094,587 trees.
Trees class: sf data.frame
coun_dist num_trees
1 51 70966
2 50 52500
3 19 49940
4 23 44917
5 13 36673
6 49 35117
7 39 32403
8 31 31321
9 32 30270
10 27 29395
11 11 27854
12 24 26287
13 28 25544
14 46 24330
15 30 23012
16 47 21340
17 20 20717
18 29 19994
19 42 18935
20 33 18861
21 21 18791
22 17 18589
23 15 18422
24 48 18237
25 8 18207
26 22 17973
27 44 17641
28 45 17267
29 18 17132
30 38 16521
31 12 16168
32 34 15875
33 7 15648
34 37 15637
35 26 15380
36 10 15310
37 35 15109
38 41 14416
39 16 13497
40 36 13472
41 9 13455
42 1 12259
43 6 12139
44 3 12002
45 40 11934
46 2 11563
47 4 11095
48 14 10905
49 43 10532
50 25 10166
51 5 8327
52 NA 542
# Create choropleth map
districts_with_counts <- council_districts |>
left_join(trees_by_district, by = "coun_dist") |>
mutate(num_trees = replace_na(num_trees, 0))
ggplot(districts_with_counts) +
geom_sf(aes(fill = num_trees), color = "white", linewidth = 0.3) +
scale_fill_viridis_c(name = "Number of Trees", labels = scales::comma) +
theme_minimal() +
labs(title = "Tree Distribution by Council District") +
theme(axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank()) coun_dist num_trees
1 51 70966
q2_tree_density <- trees_with_districts |>
st_drop_geometry() |>
filter(!is.na(coun_dist)) |>
count(coun_dist, name = "num_trees") |>
left_join(
council_districts |>
st_drop_geometry() |>
select(coun_dist, shape_area),
by = "coun_dist"
) |>
mutate(tree_density = num_trees / shape_area) |>
arrange(desc(tree_density)) |>
slice(1)
q2_tree_density coun_dist num_trees shape_area tree_density
1 39 32403 118294552 0.0002739179
# Based on your data, use tpstructure column
# "Retired" likely means dead/removed trees
q3_dead_trees <- trees_with_districts |>
st_drop_geometry() |>
filter(!is.na(coun_dist)) |>
group_by(coun_dist) |>
summarise(
total_trees = n(),
# Use tpstructure == "Retired" or "Stump" for dead trees
retired_trees = sum(tpstructure %in% c("Retired", "Stump"), na.rm = TRUE),
fraction_retired = retired_trees / total_trees,
.groups = "drop"
) |>
arrange(desc(fraction_retired)) |>
slice(1)
q3_dead_trees# A tibble: 1 × 4
coun_dist total_trees retired_trees fraction_retired
<dbl> <int> <int> <dbl>
1 32 30270 7047 0.233
Checking for species columns:
[1] "genusspecies"
Sample species names:
[1] "Acer nigrum - black maple"
[2] "Fraxinus pennsylvanica - Green ash"
[3] "Acer platanoides - Norway maple"
[4] "Pyrus calleryana - Callery pear"
[5] "Gleditsia triacanthos var. inermis - Thornless honeylocust"
[6] "Fraxinus americana - white ash"
[7] "Zelkova serrata - Japanese zelkova"
[8] "Acer platanoides - Norway maple"
[9] "Tilia cordata - littleleaf linden"
[10] "Abies alba - silver fir"
# Add borough column based on council district
trees_with_borough <- trees_with_districts |>
mutate(
borough = case_when(
coun_dist >= 1 & coun_dist <= 10 ~ "Manhattan",
coun_dist >= 11 & coun_dist <= 18 ~ "Bronx",
coun_dist >= 19 & coun_dist <= 32 ~ "Queens",
coun_dist >= 33 & coun_dist <= 48 ~ "Brooklyn",
coun_dist >= 49 & coun_dist <= 51 ~ "Staten Island",
TRUE ~ NA_character_
)
)
# Verify the borough assignment worked
cat("\nTrees by borough:\n")
Trees by borough:
borough n
1 Queens 363707
2 Brooklyn 282510
3 Bronx 159240
4 Staten Island 158583
5 Manhattan 130005
6 <NA> 542
genusspecies n
1 Gleditsia triacanthos var. inermis - Thornless honeylocust 17310
# ============================================================================
# Question 5: Species of tree closest to Baruch's campus?
# ============================================================================
library(sf)
library(dplyr)
# Helper function to create a spatial point
new_st_point <- function(lat, lon, ...){
# st_sfc expects x, y which flips the normal lat (N/S) + lon (W/E) ordering
st_sfc(st_point(c(lon, lat))) |>
st_set_crs("WGS84")
}
# Baruch College coordinates
# Address: 55 Lexington Ave (at 24th St), New York, NY 10010
# Latitude: 40.7403, Longitude: -73.9830
baruch_location <- new_st_point(lat = 40.7403, lon = -73.9830)
# Verify the point was created correctly
cat("Baruch location created:\n")Baruch location created:
Geometry set for 1 feature
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -73.983 ymin: 40.7403 xmax: -73.983 ymax: 40.7403
Geodetic CRS: WGS 84
# Make sure trees geometry is valid and in WGS84
trees_clean <- trees |>
filter(!st_is_empty(geometry)) |>
st_transform(crs = 4326) # Ensure WGS84
# Find the tree closest to Baruch
q5_closest_tree <- trees_clean |>
mutate(distance_m = as.numeric(st_distance(geometry, baruch_location))) |>
arrange(distance_m) |>
slice(1) |>
st_drop_geometry() |>
select(genusspecies, tpcondition, dbh, distance_m)
q5_closest_tree genusspecies tpcondition dbh distance_m
1 Pyrus calleryana - Callery pear Fair 7 8.492875
I will be analyzing all the data that is needed for this proposal.
# ============================================================================
# Task 5: NYC Parks Proposal - District Tree Program
# ============================================================================
library(tidyverse)
library(sf)
library(scales)
# ============================================================================
# STEP 1: Choose Your District
# ============================================================================
# Choose one of these options:
# Option 1: Baruch's district (District 2 - Manhattan)
my_district <- 2
# Option 2: Your district (replace with your district number)
# my_district <- XX
# Get your district data
my_district_boundary <- council_districts |>
filter(coun_dist == my_district)
# Get trees in your district
my_district_trees <- trees_with_districts |>
filter(coun_dist == my_district)# ============================================================================
# STEP 2: Choose Your Project Focus
# ============================================================================
# Example: Replace Dead/Retired Trees and Stumps
# Count trees by structure type
tree_structure_summary <- my_district_trees |>
st_drop_geometry() |>
count(tpstructure, sort = TRUE)
print(tree_structure_summary) tpstructure n
1 Full 8849
2 Retired 2363
3 Stump 328
4 Shaft 19
5 Stump - Uprooted 4
Stumps and retired trees in your district: 2691
# ============================================================================
# STEP 3: Quantitative Scope Statement
# ============================================================================
# Calculate project scope
project_scope <- my_district_trees |>
st_drop_geometry() |>
summarise(
total_trees = n(),
stumps = sum(tpstructure == "Stump", na.rm = TRUE),
retired = sum(tpstructure == "Retired", na.rm = TRUE),
poor_condition = sum(tpcondition == "Poor", na.rm = TRUE),
to_replace = stumps + retired
)
cat("\nProject Scope:\n")
Project Scope:
total_trees stumps retired poor_condition to_replace
1 11563 328 2363 291 2691
# ============================================================================
# STEP 4: Zoomed-in Map of Your District
# ============================================================================
# Map showing all trees in your district
map_my_district <- ggplot() +
# District boundary
geom_sf(data = my_district_boundary,
fill = "lightgray",
color = "black",
linewidth = 1.5) +
# All trees
geom_sf(data = my_district_trees,
aes(color = tpstructure),
size = 0.5,
alpha = 0.6) +
scale_color_manual(
name = "Tree Status",
values = c("Full" = "darkgreen",
"Retired" = "orange",
"Stump" = "red")
) +
theme_minimal() +
labs(
title = paste("Trees in Council District", my_district),
subtitle = paste(format(nrow(my_district_trees), big.mark = ","), "total trees")
) +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.position = "bottom"
)
print(map_my_district)# Alternative: Map showing only stumps and retired trees
map_replacement_targets <- ggplot() +
geom_sf(data = my_district_boundary,
fill = "white",
color = "black",
linewidth = 1.5) +
# Only stumps and retired trees
geom_sf(data = my_district_trees |>
filter(tpstructure %in% c("Stump", "Retired")),
aes(color = tpstructure),
size = 1,
alpha = 0.7) +
scale_color_manual(
name = "Replacement Needed",
values = c("Retired" = "orange", "Stump" = "red")
) +
theme_minimal() +
labs(
title = paste("Trees to Replace in District", my_district),
subtitle = paste(stumps_count, "stumps and retired trees")
) +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
legend.position = "bottom"
)
print(map_replacement_targets)# ============================================================================
# STEP 5: Compare to Other Districts
# ============================================================================
# Compare stumps/retired trees across districts
district_comparison <- trees_with_districts |>
st_drop_geometry() |>
filter(!is.na(coun_dist)) |>
group_by(coun_dist) |>
summarise(
total_trees = n(),
stumps_retired = sum(tpstructure %in% c("Stump", "Retired"), na.rm = TRUE),
percent_need_replacement = (stumps_retired / total_trees) * 100,
.groups = "drop"
) |>
arrange(desc(stumps_retired))
# Get your district's rank
my_district_rank <- district_comparison |>
mutate(rank = row_number()) |>
filter(coun_dist == my_district)
cat("\nYour district ranks #", my_district_rank$rank,
"in number of stumps/retired trees\n")
Your district ranks # 34 in number of stumps/retired trees
Top 10 Districts Needing Tree Replacement:
# A tibble: 10 × 4
coun_dist total_trees stumps_retired percent_need_replacement
<dbl> <int> <int> <dbl>
1 51 70966 12994 18.3
2 50 52500 10559 20.1
3 19 49940 9723 19.5
4 23 44917 8690 19.3
5 49 35117 7082 20.2
6 32 30270 7047 23.3
7 13 36673 6948 18.9
8 27 29395 5632 19.2
9 31 31321 5576 17.8
10 11 27854 5521 19.8
# ============================================================================
# STEP 6: Non-Map Graphic (Bar Chart)
# ============================================================================
# Select districts to compare (your district + 3-4 others)
districts_to_compare <- c(my_district, 1, 3, 5) # Adjust as needed
comparison_data <- district_comparison |>
filter(coun_dist %in% districts_to_compare) |>
mutate(
is_my_district = coun_dist == my_district,
district_label = paste("District", coun_dist)
)
# Bar chart comparing stumps/retired trees
bar_chart_comparison <- ggplot(comparison_data,
aes(x = reorder(district_label, -stumps_retired),
y = stumps_retired,
fill = is_my_district)) +
geom_col() +
geom_text(aes(label = comma(stumps_retired)),
vjust = -0.5,
size = 4) +
scale_fill_manual(
values = c("TRUE" = "#E74C3C", "FALSE" = "gray70"),
guide = "none"
) +
theme_minimal() +
labs(
title = "Trees Needing Replacement by District",
subtitle = "Stumps and Retired Trees",
x = "Council District",
y = "Number of Trees"
) +
theme(
plot.title = element_text(face = "bold", size = 14),
axis.text.x = element_text(angle = 45, hjust = 1)
)
print(bar_chart_comparison)# ============================================================================
# STEP 7: Map Comparison with Other Districts
# ============================================================================
# Compare your district with nearby districts
comparison_districts <- c(my_district, 1, 3, 5) # Adjust as needed
comparison_boundaries <- council_districts |>
filter(coun_dist %in% comparison_districts)
comparison_trees <- trees_with_districts |>
filter(coun_dist %in% comparison_districts,
tpstructure %in% c("Stump", "Retired"))
map_district_comparison <- ggplot() +
geom_sf(data = comparison_boundaries,
aes(fill = factor(coun_dist)),
alpha = 0.3,
color = "black",
linewidth = 0.8) +
geom_sf(data = comparison_trees,
color = "red",
size = 0.3,
alpha = 0.5) +
scale_fill_brewer(
name = "Council District",
palette = "Set2"
) +
theme_minimal() +
labs(
title = "Tree Replacement Needs: District Comparison",
subtitle = "Red points show stumps and retired trees"
) +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank()
)
print(map_district_comparison)
========================================
PROPOSAL SUMMARY STATISTICS
========================================
District: 2
Total trees: 11,563
Stumps: 328
Retired trees: 2,363
Trees in poor condition: 291
Total replacement needed: 2,691
Comparison with other districts:
# A tibble: 4 × 3
coun_dist stumps_retired percent_need_replacement
<dbl> <int> <dbl>
1 1 2796 22.8
2 2 2691 23.3
3 3 2541 21.2
4 5 1710 20.5
========================================
Tree Replacement Initiative — Council District 2 As a Council Member representing District 2 (Gramercy, Kips Bay, Murray Hill), I propose a tree replacement program to address our district’s critical tree maintenance needs.
🌿 Project: “ReGrow District 2” The Problem: District 2 has 11,563 trees, but 2,691 trees (23.3%) are dead, retired, or stumps—the highest replacement rate among neighboring Manhattan districts. The Solution: Remove stumps, replace dead trees, and plant new climate-resilient species
Why District 2? District 2 stands out with 23.3% of trees needing replacement—higher than Districts 1 (22.8%), 3 (21.2%), and 5 (20.5%). While District 1 has more trees overall, District 2 has the highest percentage requiring replacement, making it the most urgent priority for intervention. Our 2,691 trees needing replacement include: 328 dangerous stumps (tripping hazards) 2,363 retired/dead trees (providing no environmental benefit)
Project Scope Proposed Actions: Remove 328 dangerous stumps Replace 2,363 retired/dead trees Plant 500 NEW trees in underserved blocks Maintain 291 trees in poor condition Total Impact: 3,482 tree interventions Timeline: 18 months Budget: $1.5M ($430 per tree)
🗺️ Visualizations Map 1: Distribution of 11,563 trees in District 2 by condition Chart 2: District 2 has highest replacement needs compared to Districts 1, 3, 5 Map 3: Red points show 2,691 stumps and retired trees requiring immediate action.
✅ Expected Outcomes Improved air quality and reduced heat island effect Enhanced public safety by removing 328 hazardous stumps Expanded tree canopy with 500 new plantings Healthier urban forest serving 150,000+ residents Request: $1.5M Parks Department allocation for District 2 tree revitalization.